﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace QW.Core.Helper
{
    /// <summary>
    /// Html标签辅助类
    /// </summary>
    public class HtmlTagHelper
    {
        /// <summary>
        /// 清除HTML中的JS脚本和style脚本
        /// </summary>
        /// <param name="htmlText"></param>
        /// <returns></returns>
        public static string RemoveScriptsAndStyle(string htmlText)
        {
            htmlText = RemoveScriptTag(htmlText);
            htmlText = RemoveStyleTag(htmlText);
            return htmlText;
        }
        /// <summary>
        /// 清除所有空格
        /// <para>包括html空格</para>
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static string RemoveAllSpace(string str)
        {
            str = Regex.Replace(str, @"\s", string.Empty, RegexOptions.IgnoreCase);
            str = Regex.Replace(str, @"&nbsp;", string.Empty, RegexOptions.IgnoreCase);
            return str;
        }
        /// <summary>
        /// 简单编码
        /// </summary>
        /// <param name="str">源字符串</param>
        /// <returns></returns>
        public static string SimpleEncode(string str)
        {
            str = str.Replace("&", "&amp;");
            str = str.Replace("'", "&#39;");
            str = str.Replace("\"", "&quot;");
            str = str.Replace(" ", "&nbsp;");
            str = str.Replace("<", "&lt;");
            str = str.Replace(">", "&gt;");
            str = str.Replace("\n", "<br>");
            str = str.Replace("\r", "<br>");
            str = str.Replace("\r\n", "<br>");
            return str;
        }
        /// <summary>
        /// 简单解码
        /// </summary>
        /// <param name="str">要解码的字符串</param>
        /// <returns></returns>
        public static string SimpleDecode(string str)
        {
            str = str.Replace("<br>", "\n");
            str = str.Replace("&gt;", ">");
            str = str.Replace("&lt;", "<");
            str = str.Replace("&nbsp;", " ");
            str = str.Replace("&quot;", "\"");
            str = str.Replace("&amp;", "&");
            str = str.Replace("&#39;", "'");
            return str;
        }

        #region 移除HTML标签
        /// <summary>
        /// 移除HTML标签
        /// </summary>
        /// <param name="html"></param>
        /// <param name="unRemoveTags"></param>
        /// <returns></returns>
        public static string RemoveTag(string html, string[] unRemoveTags = null)
        {
            if (unRemoveTags == null)
            {
                unRemoveTags = new string[] { "p", "b", "em", "a", "strong", "img" };
            }
            string[] otherTags = new string[] { "!doctype html", "base", "!", "object" };

            html = Regex.Replace(html, @" <!--([^(-){2}])*-->", "");//过滤注释
            foreach (string e in otherTags)
            {
                if (!unRemoveTags.Contains(e.ToString()))
                {
                    html = Regex.Replace(html, TagRegex.Remove(e.ToString()), "");
                }
            }

            Array array = Enum.GetValues(typeof(HtmlTagByAll));
            foreach (Enum e in array)
            {
                if (!unRemoveTags.Contains(e.ToString()))
                {
                    html = Regex.Replace(html, TagRegex.RemoveAll(e.ToString()), "");
                }
            }

            array = Enum.GetValues(typeof(HtmlTag));
            foreach (Enum e in array)
            {
                if (!unRemoveTags.Contains(e.ToString()))
                {
                    html = Regex.Replace(html, TagRegex.Remove(e.ToString()), "");
                }
            }
            return html;
        }


        /// <summary>
        /// 移除Script代码
        /// </summary>
        /// <param name="htmlText"></param>
        /// <returns></returns>
        public static string RemoveScriptTag(string htmlText)
        {
            htmlText = Regex.Replace(htmlText, @"<\s*script[^>]*?>.*?<\s*/\s*script\s*>", "", RegexOptions.IgnoreCase);
            return htmlText;
        }
        /// <summary>
        /// 清除style脚本
        /// </summary>
        /// <param name="htmlText"></param>
        /// <returns></returns>
        public static string RemoveStyleTag(string htmlText)
        {
            htmlText = Regex.Replace(htmlText, @"<\s*style[^>]*?>.*?<\s*/\s*style\s*>", "", RegexOptions.IgnoreCase);
            return htmlText;
        }
        /// <summary>
        /// 移除Div代码
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        public static string RemoveDivTag(string text)
        {
            text = Regex.Replace(text, @"<[\/]{0,1}(div [^<>]*>)|<[\/]{0,1}(div>)", "");
            return text;
        }
        /// <summary>
        /// 移除Iframe代码
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        public static string RemoveIframeTag(string text)
        {
            text = Regex.Replace(text, @"(<iframe){1,}[^<>]*>[^\0]*(<\/iframe>){1,}", "");
            return text;
        }
        #endregion

    }
    /// <summary>
    /// 标签正则
    /// </summary>
    public class TagRegex
    {
        /// <summary>
        /// 移除
        /// </summary>
        /// <param name="tag"></param>
        /// <returns></returns>
        public static string Remove(string tag)
        {
            return @"<[\/]{0,1}(" + tag + @" [^<>]*>)|<[\/]{0,1}(" + tag + ">)";
        }
        /// <summary>
        /// 移除所有
        /// </summary>
        /// <param name="tag"></param>
        /// <returns></returns>
        public static string RemoveAll(string tag)
        {
            return @"<" + tag + @"([^>])*>(\w|\W)*?</" + tag + "([^>])*>";//@"(<" + tag + @"){1,}[^<>]*>[^\0]*(<\/" + tag + ">){1,}";
        }
    }
    /// <summary>
    /// 将移除全部标签
    /// </summary>
    public enum HtmlTagByAll
    {
        /// <summary>
        /// 文档的样式信息
        /// </summary>
        style,
        /// <summary>
        /// 内联框架
        /// </summary>
        iframe,
        /// <summary>
        /// 客户端脚本
        /// </summary>
        script,
    }
    /// <summary>
    /// 只移除标签，保留内容
    /// </summary>
    public enum HtmlTag
    {
        /// <summary>
        /// 锚
        /// </summary>
        a,
        /// <summary>
        /// 缩写
        /// </summary>
        abbr,
        /// <summary>
        /// 只取首字母的缩写
        /// </summary>
        acronym,
        /// <summary>
        /// 文档作者或拥有者的联系信息
        /// </summary>
        address,
        /// <summary>
        /// 嵌入的applet，不建议使用
        /// </summary>
        applet,
        /// <summary>
        /// 图像地图内部的热点区域
        /// </summary>
        area,
        /// <summary>
        /// 粗体文本
        /// </summary>
        b,
        //base,//页面中所有链接的默认地址或默认目标
        /// <summary>
        /// 页面中文本的默认字体、颜色或尺寸，不建议使用
        /// </summary>
        baseont,
        /// <summary>
        /// 文本的方向
        /// </summary>
        bdo,
        /// <summary>
        /// 大号文本
        /// </summary>
        big,
        /// <summary>
        /// 块引用
        /// </summary>
        blockquote,
        /// <summary>
        /// 文档的主体
        /// </summary>
        body,
        /// <summary>
        /// 换行
        /// </summary>
        br,
        /// <summary>
        /// 按钮
        /// </summary>
        button,
        /// <summary>
        /// 表格标题
        /// </summary>
        caption,
        /// <summary>
        /// 居中文本
        /// </summary>
        center,
        /// <summary>
        /// 引用(citation)
        /// </summary>
        cite,
        /// <summary>
        /// 代码文本
        /// </summary>
        code,
        /// <summary>
        /// 列
        /// </summary>
        col,
        /// <summary>
        /// 列组
        /// </summary>
        colgroup,
        /// <summary>
        /// 列表中项目的描述
        /// </summary>
        dd,
        /// <summary>
        /// 被删除文本
        /// </summary>
        del,
        /// <summary>
        /// 目录列表，不建议使用
        /// </summary>
        dir,
        /// <summary>
        /// 层
        /// </summary>
        div,
        /// <summary>
        /// 列表
        /// </summary>
        dl,
        /// <summary>
        /// 项目
        /// </summary>
        dn,//    
           //DOCTYPE,// 文档类型  
        /// <summary>
        /// 列表中的项目
        /// </summary>
        dt,
        /// <summary>
        /// 强调文本
        /// </summary>
        em,
        /// <summary>
        /// 表单
        /// </summary>
        form,
        /// <summary>
        /// 标题1
        /// </summary>
        h1,
        /// <summary>
        /// 标题2
        /// </summary>
        h2,
        /// <summary>
        /// 标题3
        /// </summary>
        h3,
        /// <summary>
        /// 标题4
        /// </summary>
        h4,
        /// <summary>
        /// 标题5
        /// </summary>
        h5,
        /// <summary>
        /// 标题6
        /// </summary>
        h6,
        /// <summary>
        /// 文档头
        /// </summary>
        head,
        /// <summary>
        /// 水平线
        /// </summary>
        hr,
        /// <summary>
        /// html文档
        /// </summary>
        html,
        /// <summary>
        /// 斜体
        /// </summary>
        i,
        /// <summary>
        /// 围绕表单中元素的边框
        /// </summary>
        fieldset,
        /// <summary>
        /// 图像
        /// </summary>
        img,
        /// <summary>
        /// 输入控件
        /// </summary>
        input,
        /// <summary>
        /// 被插入文本
        /// </summary>
        ins,
        /// <summary>
        /// 可搜索索引，
        /// </summary>
        isindex,
        /// <summary>
        /// 键盘文本
        /// </summary>
        kbd,
        /// <summary>
        /// 标签
        /// </summary>
        label,
        /// <summary>
        /// fieldset 元素的标题 
        /// </summary>
        legend,
        /// <summary>
        /// 列表项
        /// </summary>
        li,
        /// <summary>
        /// 链接
        /// </summary>
        link,
        /// <summary>
        /// 图像映射
        /// </summary>
        map,
        /// <summary>
        /// 菜单
        /// </summary>
        menu,
        /// <summary>
        /// 元信息
        /// </summary>
        meta,
        /// <summary>
        /// 针对不支持框架的用户的替代内容
        /// </summary>
        norames,
        /// <summary>
        /// 针对不支持客户端脚本的用户的替代内容
        /// </summary>
        noscript,
        //object,//嵌入的对象
        /// <summary>
        /// 有序列表
        /// </summary>
        ol,
        /// <summary>
        /// 文本的字体、尺寸和颜色 
        /// </summary>
        ont,
        /// <summary>
        /// 脚注
        /// </summary>
        oot,
        /// <summary>
        /// 选项组
        /// </summary>
        optgroup,
        /// <summary>
        /// 选项
        /// </summary>
        option,
        /// <summary>
        /// 表单
        /// </summary>
        orm,
        /// <summary>
        /// 段落
        /// </summary>
        p,
        /// <summary>
        /// 参数
        /// </summary>
        param,
        /// <summary>
        /// 预格式文本
        /// </summary>
        pre,
        /// <summary>
        /// 短的引用
        /// </summary>
        q,
        /// <summary>
        /// 框架集的窗口或框架
        /// </summary>
        frame,
        /// <summary>
        /// 框架集
        /// </summary>
        frameset,
        /// <summary>
        /// 加删除线的文本
        /// </summary>
        s,
        /// <summary>
        /// 代码样本
        /// </summary>
        samp,
        /// <summary>
        /// 选择列表
        /// </summary>
        select,
        /// <summary>
        /// 小号文本
        /// </summary>
        small,
        /// <summary>
        /// span
        /// </summary>
        span,
        /// <summary>
        /// 加删除线的文本
        /// </summary>
        strike,
        /// <summary>
        /// 加粗
        /// </summary>
        strong,
        /// <summary>
        /// 下标
        /// </summary>
        sub,
        /// <summary>
        /// 上标
        /// </summary>
        sup,
        /// <summary>
        /// 表格
        /// </summary>
        table,
        /// <summary>
        /// 表格主体
        /// </summary>
        tbody,
        /// <summary>
        /// 单元格
        /// </summary>
        td,
        /// <summary>
        /// 文本域
        /// </summary>
        textarea,
        /// <summary>
        /// 表单单元格
        /// </summary>
        th,
        /// <summary>
        /// 表头
        /// </summary>
        thead,
        /// <summary>
        /// 文档标题
        /// </summary>
        title,
        /// <summary>
        /// 表格行
        /// </summary>
        tr,
        /// <summary>
        /// 打字机文本
        /// </summary>
        tt,
        /// <summary>
        /// 下划线文本
        /// </summary>
        u,
        /// <summary>
        /// 无序列表
        /// </summary>
        ul,
        /// <summary>
        /// 文本的变量部分
        /// </summary>
        var,
        /// <summary>
        /// 预格式文本
        /// </summary>
        xmp,
    }
}
